\relax 
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}}
\citation{MapReduce}
\citation{MapReduce}
\citation{HPCVM}
\citation{Hadoop}
\citation{Hadoop}
\citation{MapReduce}
\@writefile{toc}{\contentsline {section}{\numberline {2}Problem Description - The Inverted Index}{2}}
\newlabel{sec:problem}{{2}{2}}
\citation{HPCVM}
\citation{HPCVM}
\citation{HPCVM}
\citation{Cloudlet}
\citation{MRVM}
\citation{MPI}
\citation{Hadoop}
\citation{MPI}
\citation{MPI}
\citation{MPI}
\citation{MPI}
\@writefile{toc}{\contentsline {section}{\numberline {3}Literature Review}{3}}
\newlabel{sec:litreview}{{3}{3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Why MapReduce?}{3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}The Issue of Virtual Machines}{3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Implementing MapReduce}{3}}
\citation{white2010hadoop}
\citation{Hadoop}
\citation{Hadoop}
\citation{InvInd}
\citation{Book}
\citation{Book}
\citation{Book}
\citation{MapReduce}
\citation{MapReduce}
\citation{MPI}
\citation{MPI}
\citation{HPCVM}
\citation{Hadoop}
\citation{MRVM}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.4}Inverted Indexing}{4}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.5}Performance}{4}}
\citation{clueweb}
\citation{wikidumps}
\citation{hadooptutorial2011}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.6}Synthesis}{5}}
\@writefile{toc}{\contentsline {section}{\numberline {4}Implementation}{5}}
\newlabel{sec:implementation}{{4}{5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Data Sets}{5}}
\newlabel{datasets}{{4.1}{5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Hadoop}{5}}
\newlabel{sec:hadoop}{{4.2}{5}}
\citation{white2010hadoop}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Hadoop Multi-node Cluster Management Services \cite  {hadooptutorial2011}}}{6}}
\newlabel{fig:hadoopScheme}{{1}{6}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces MapReduce Execution Scheme \cite  {white2010hadoop}}}{6}}
\newlabel{fig:mapReduceScheme}{{2}{6}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.2.1}Hadoop Inverted Index Simple}{6}}
\newlabel{hadoopSimple}{{4.2.1}{6}}
\newlabel{lst:sp}{{1}{7}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {1}Hadoop inverted index Simple (Mapper)}{7}}
\newlabel{lst:sp}{{2}{7}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {2}Hadoop inverted index Simple (Reducer)}{7}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.2.2}Hadoop Inverted Index Combiner}{7}}
\newlabel{hadoopCombiner}{{4.2.2}{7}}
\newlabel{lst:sp}{{3}{8}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {3}Hadoop inverted index Combiner (Mapper)}{8}}
\newlabel{lst:sp}{{4}{9}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {4}Hadoop inverted index Combiner (Reducer)}{9}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}MPI}{9}}
\newlabel{sec:mpi}{{4.3}{9}}
\newlabel{lst:sp}{{5}{10}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {5}MPI inverted index Mapper with Combiner}{10}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.4}Sequential}{10}}
\newlabel{sec:sequential}{{4.4}{10}}
\@writefile{toc}{\contentsline {section}{\numberline {5}Experimentation}{10}}
\newlabel{sec:experimentation}{{5}{10}}
\newlabel{lst:sp}{{6}{11}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {6}MPI inverted index Mapper Communication}{11}}
\newlabel{lst:sp}{{7}{11}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {7}MPI inverted index Reducer}{11}}
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Experiments Summary (Implementations vs. Datasets)}}{11}}
\newlabel{tab:expsummary}{{1}{11}}
\citation{WikiStats}
\citation{WebStats}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Metrics and Measured Items}{12}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Experimental Environment}{13}}
\newlabel{enviroment}{{5.2}{13}}
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Host Machine Subject Description}}{13}}
\newlabel{tab:machines}{{2}{13}}
\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Virtual Node Subject Description}}{13}}
\newlabel{tab:nodes}{{3}{13}}
\@writefile{toc}{\contentsline {section}{\numberline {6}Results}{13}}
\newlabel{sec:results}{{6}{13}}
\@writefile{lot}{\contentsline {table}{\numberline {4}{\ignorespaces Experiment Results - Generated Index Size Comparison}}{13}}
\newlabel{tab:indexSize}{{4}{13}}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Filesystem Usage - Experimental Results}}{14}}
\newlabel{fig:filesystemResults}{{3}{14}}
\@writefile{lot}{\contentsline {table}{\numberline {5}{\ignorespaces Experiment Results - Filesystem Usage (Minutes)}}{14}}
\newlabel{tab:diskUsage}{{5}{14}}
\@writefile{lot}{\contentsline {table}{\numberline {6}{\ignorespaces Experiment Results - Total Map Phase Time Comparison}}{14}}
\newlabel{tab:mapTime}{{6}{14}}
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Calculated inverted index Size - Experimental Results}}{15}}
\newlabel{fig:indexSizeResults}{{4}{15}}
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Map Phase Execution Time - Experimental Results}}{15}}
\newlabel{fig:mapPhaseTime}{{5}{15}}
\@writefile{lot}{\contentsline {table}{\numberline {7}{\ignorespaces Experiment Results - Total Time Comparison}}{15}}
\newlabel{tab:totalTime}{{7}{15}}
\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Total Execution Time - Experimental Results}}{16}}
\newlabel{fig:totalTime}{{6}{16}}
\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Hadoop Simple - ClueWeb 1 GB Network Results}}{16}}
\newlabel{fig:hadoopSimpleWeb1Network}{{7}{16}}
\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Hadoop Simple - Wiki 1 GB Network Results}}{17}}
\newlabel{fig:hadoopSimpleWiki1Network}{{8}{17}}
\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces Hadoop Simple - ClueWeb 4 GB Network Results}}{17}}
\newlabel{fig:hadoopSimpleWeb4Network}{{9}{17}}
\@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces Hadoop Simple - Wiki 4 GB Network Results}}{17}}
\newlabel{fig:hadoopSimpleWiki4Network}{{10}{17}}
\@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces Hadoop Combiner - ClueWeb 1 GB Network Results}}{18}}
\newlabel{fig:hadoopCombinerWeb1Network}{{11}{18}}
\@writefile{lof}{\contentsline {figure}{\numberline {12}{\ignorespaces Hadoop Combiner - Wiki 1 GB Network Results}}{18}}
\newlabel{fig:hadoopCombinerWiki1Network}{{12}{18}}
\@writefile{lof}{\contentsline {figure}{\numberline {13}{\ignorespaces Hadoop Combiner - ClueWeb 4 GB Network Results}}{18}}
\newlabel{fig:hadoopCombinerWeb4Network}{{13}{18}}
\@writefile{lof}{\contentsline {figure}{\numberline {14}{\ignorespaces Hadoop Combiner - Wiki 4 GB Network Results}}{19}}
\newlabel{fig:hadoopCombinerWiki4Network}{{14}{19}}
\@writefile{lof}{\contentsline {figure}{\numberline {15}{\ignorespaces MPI - ClueWeb 1 GB Network Results}}{19}}
\newlabel{fig:MPIWeb1Network}{{15}{19}}
\@writefile{toc}{\contentsline {section}{\numberline {7}Analysis}{19}}
\newlabel{sec:analysis}{{7}{19}}
\@writefile{toc}{\contentsline {subsection}{\numberline {7.1}Inverted Index Size}{19}}
\@writefile{lof}{\contentsline {figure}{\numberline {16}{\ignorespaces MPI - Wiki 1 GB Network Results}}{20}}
\newlabel{fig:MPIWiki1Network}{{16}{20}}
\@writefile{lof}{\contentsline {figure}{\numberline {17}{\ignorespaces MPI - ClueWeb 4 GB Network Results}}{20}}
\newlabel{fig:MPIWeb4Network}{{17}{20}}
\@writefile{lof}{\contentsline {figure}{\numberline {18}{\ignorespaces MPI - Wiki 4 GB Network Results}}{21}}
\newlabel{fig:MPIWiki4Network}{{18}{21}}
\@writefile{toc}{\contentsline {subsection}{\numberline {7.2}Disk and Memory Usage}{21}}
\newlabel{sec:usageAnalysis}{{7.2}{21}}
\@writefile{toc}{\contentsline {subsection}{\numberline {7.3}Map Phase Time}{22}}
\@writefile{toc}{\contentsline {subsection}{\numberline {7.4}Total Time}{22}}
\newlabel{sec:totalTime}{{7.4}{22}}
\@writefile{toc}{\contentsline {subsection}{\numberline {7.5}Network Analysis}{23}}
\@writefile{toc}{\contentsline {subsection}{\numberline {7.6}Hadoop}{23}}
\@writefile{toc}{\contentsline {subsection}{\numberline {7.7}MPI}{23}}
\@writefile{toc}{\contentsline {section}{\numberline {8}Conclusions}{24}}
\@writefile{toc}{\contentsline {section}{\numberline {9}Lessons}{25}}
\@writefile{toc}{\contentsline {subsection}{\numberline {9.1}Environment}{25}}
\@writefile{toc}{\contentsline {subsection}{\numberline {9.2}Memory Management}{25}}
\bibstyle{abbrv}
\bibdata{../bib/references}
\bibcite{MapReduce}{1}
\bibcite{MPI}{2}
\bibcite{HPCVM}{3}
\bibcite{Cloudlet}{4}
\@writefile{toc}{\contentsline {section}{\numberline {10}Authors Contribution}{26}}
\bibcite{MRVM}{5}
\bibcite{Hadoop}{6}
\bibcite{WebStats}{7}
\bibcite{Book}{8}
\bibcite{InvInd}{9}
\bibcite{hadooptutorial2011}{10}
\bibcite{clueweb}{11}
\bibcite{white2010hadoop}{12}
\bibcite{wikidumps}{13}
\bibcite{WikiStats}{14}
